library(Seurat)
library(tidyverse)
library(SingleR)
library(celldex)
library(knitr)
library(Azimuth)
library(cowplot)
library(grDevices)
set.seed(4)
# Top-level folder where script outputs should be stored
script_output_dir <- file.path(here::here(), "output")
day3_clstr <- readRDS(file = file.path(script_output_dir, "processed_data/5_singlets_with_adt_d3.rds"))
day15_clstr <- readRDS(file = file.path(script_output_dir, "processed_data/5_singlets_with_adt_d15.rds"))
Starting number of cells
length(day3_clstr$orig.ident)
## [1] 12065
length(day15_clstr$orig.ident)
## [1] 4691
Use SingleR for high-level clusters.
# Load reference data
# Note, must downgrade dbplyr to version 2.3.4 to avoid 'collect()' error
ref_data <- HumanPrimaryCellAtlasData(ensembl = FALSE, cell.ont = "all")
Day 3
# Note that better to use raw counts than SCTransform data (see SingleR issue #98)
d3_norm_count <- GetAssayData(day3_clstr, layer = "counts", assay = "RNA")
# Annotate
d3_predictions <- SingleR(test = d3_norm_count,
ref = ref_data,
labels = ref_data$label.main)
day3_clstr[["SingleR_labels"]] <- d3_predictions$labels
day3_clstr[["SingleR_prunedlabels"]] <- d3_predictions$pruned.labels
# QC: Confidence scores
d3_clusts <- day3_clstr$seurat_clusters
plotScoreHeatmap(d3_predictions,
clusters = d3_clusts,
order_by = "clusters")
# QC: Number of cells it couldn't annotate
summary(is.na(day3_clstr$SingleR_prunedlabels))
## Mode FALSE TRUE
## logical 12061 4
Visualize
Idents(day3_clstr) <- "SingleR_prunedlabels"
DimPlot(day3_clstr, group.by = "SingleR_prunedlabels") + ggtitle("Day 3 SingleR (Human Cell Atlas)")
# Highlight cells it couldn't annotate
d3_unannot <- day3_clstr@meta.data[is.na(day3_clstr@meta.data$SingleR_prunedlabels),]
d3_num_unannot <- as.character(nrow(d3_unannot))
DimPlot(day3_clstr, cells.highlight = rownames(d3_unannot)) +
NoLegend() +
ggtitle(paste0("Day 3: Cells without SingleR pruned label (n=", d3_num_unannot, ")"))
Day 15
# Note that better to use raw counts than SCTransform data (see SingleR issue #98)
d15_norm_count <- GetAssayData(day15_clstr, layer = "counts", assay = "RNA")
# Annotate
d15_predictions <- SingleR(test = d15_norm_count, ref = ref_data,
labels = ref_data$label.main)
day15_clstr[["SingleR_labels"]] <- d15_predictions$labels
day15_clstr[["SingleR_prunedlabels"]] <- d15_predictions$pruned.labels
# QC: Confidence scores
d15_clusts <- day15_clstr$seurat_clusters
plotScoreHeatmap(d15_predictions,
clusters = d15_clusts,
order_by = "clusters")
# QC: Number of cells it couldn't annotate
summary(is.na(d15_predictions$pruned.labels))
## Mode FALSE TRUE
## logical 4635 56
Visualize
Idents(day15_clstr) <- "SingleR_prunedlabels"
DimPlot(day15_clstr) + ggtitle("Day 15 SingleR (Human Cell Atlas)")
# Highlight cells it couldn't annotate
d15_unannot <- day15_clstr@meta.data[is.na(day15_clstr@meta.data$SingleR_prunedlabels),]
d15_num_unannot <- as.character(nrow(d15_unannot))
DimPlot(day15_clstr, cells.highlight = rownames(d15_unannot)) +
NoLegend() +
ggtitle(paste0("Day 15: Cells without SingleR pruned label (n=", d15_num_unannot, ")"))
54 seems like a lot, I see these are mostly T cells and NK cells. Will see what Azimuth comes up with for annotation.
# SingleR labels of these cells that don't have pruned labels
table(d15_unannot$SingleR_labels)
##
## DC Macrophage NK_cell T_cells
## 1 2 20 33
Where are these NK cells located in the UMAP?
unannot_nk <- d15_unannot %>%
filter(SingleR_labels == "NK_cell") %>%
rownames()
DimPlot(day15_clstr, cells.highlight = unannot_nk) +
NoLegend() +
ggtitle(paste0("Day 15: Cells with NK_cell label and no pruned label (n=",
length(unannot_nk), ")"))
lymphoid = c("T_cells", "NK_cell", "B_cell", "Pre-B_cell_CD34-", "Pro-B_cell_CD34+",
"HSC_CD34+")
myeloid = c("Monocyte", "Macrophage", "DC", "Neutrophils",
"GMP", # Granulocyte-Macrophage Progenitor?
"CMP") # Common Myeloid Progenitor?
muscle = c("Fibroblasts", "Smooth_muscle_cells", "Tissue_stem_cells", "iPS_cells")
connective = c("Chondrocytes", "Osteoblasts", "MSC")
nervous = c("Neurons", "Astrocyte")
epithelial = c("Epithelial_cells")
endothelial = c("Endothelial_cells")
day3_clstr@meta.data <- day3_clstr@meta.data %>%
mutate(eb_atlas_idents = case_when(
SingleR_prunedlabels %in% lymphoid ~ "Lymphoid",
SingleR_prunedlabels %in% myeloid ~ "Myeloid",
SingleR_prunedlabels %in% muscle ~ "Muscle tissue",
SingleR_prunedlabels %in% connective ~ "Connective tissue",
SingleR_prunedlabels %in% nervous ~ "Nervous tissue",
SingleR_prunedlabels %in% epithelial ~ "Epithelial",
SingleR_prunedlabels %in% endothelial ~ "Endothelial",
is.na(SingleR_prunedlabels) ~ "Unknown",
.default = SingleR_prunedlabels
))
day15_clstr@meta.data <- day15_clstr@meta.data %>%
mutate(eb_atlas_idents = case_when(
SingleR_prunedlabels %in% lymphoid ~ "Lymphoid",
SingleR_prunedlabels %in% myeloid ~ "Myeloid",
SingleR_prunedlabels %in% muscle ~ "Muscle tissue",
SingleR_prunedlabels %in% connective ~ "Connective tissue",
SingleR_prunedlabels %in% nervous ~ "Nervous tissue",
SingleR_prunedlabels %in% epithelial ~ "Epithelial",
SingleR_prunedlabels %in% endothelial ~ "Endothelial",
is.na(SingleR_prunedlabels) ~ "Unknown",
.default = SingleR_prunedlabels
))
Idents(day3_clstr) <- "eb_atlas_idents"
Idents(day15_clstr) <- "eb_atlas_idents"
Save
day3_clstr_filt <- day3_clstr
day15_clstr_filt <- day15_clstr
saveRDS(day3_clstr_filt, file = file.path(script_output_dir, "processed_data/6_singler_d3.rds"))
saveRDS(day15_clstr_filt, file = file.path(script_output_dir, "processed_data/6_singler_d15.rds"))
day3_clstr_filt <- readRDS(file = file.path(script_output_dir, "processed_data/6_singler_d3.rds"))
day15_clstr_filt <- readRDS(file = file.path(script_output_dir, "processed_data/6_singler_d15.rds"))
Idents(day3_clstr_filt) <- "eb_atlas_idents"
Idents(day15_clstr_filt) <- "eb_atlas_idents"
Visualize
day3_clstr_filt@meta.data <- day3_clstr_filt@meta.data %>%
mutate(eb_atlas_idents = factor(eb_atlas_idents,
levels = c("Keratinocytes",
"Lymphoid",
"Myeloid",
"Epithelial",
"Endothelial",
"Muscle tissue",
"Connective tissue",
"Nervous tissue",
"Unknown")))
day15_clstr_filt@meta.data <- day15_clstr_filt@meta.data %>%
mutate(eb_atlas_idents = factor(eb_atlas_idents,
levels = c("Keratinocytes",
"Lymphoid",
"Myeloid",
"Epithelial",
"Endothelial",
"Muscle tissue",
"Connective tissue",
"Nervous tissue",
"Unknown")))
Idents(day3_clstr_filt) <- "eb_atlas_idents"
Idents(day15_clstr_filt) <- "eb_atlas_idents"
clstr_colors <- c("Lymphoid" = "#F8766D", "Myeloid" = "#E68613",
"Epithelial" = "#0CB702", "Connective tissue" = "#00BE67",
"Nervous tissue" = "#A52A2A", "Endothelial" = "#00A9FF",
"Muscle tissue" = "#C77CFF", "Keratinocytes" = "#FF61CC",
"Unknown" = "gray")
d1_with_leg <- DimPlot(day3_clstr_filt, pt.size = 0.4) +
ggtitle("DAY 3") +
theme(plot.title = element_text(hjust = 0.5,
size = 16),
legend.position = "bottom",
legend.spacing.x = unit(0.4, 'cm'),
legend.text = element_text(size=12, margin = margin(t = 1))) +
scale_color_manual(values = clstr_colors) +
guides(color = guide_legend(ncol = 5,
override.aes = list(size=4))) +
scale_x_discrete("UMAP1") +
scale_y_discrete("UMAP2")
d1_with_leg
d1 <- DimPlot(day3_clstr_filt, pt.size = 0.4) +
ggtitle("DAY 3") +
theme(plot.title = element_text(hjust = 0.5,
size = 16),
legend.position = "none") +
scale_color_manual(values = clstr_colors) +
scale_x_discrete("UMAP1") +
scale_y_discrete("UMAP2")
d1
d2 <- DimPlot(day15_clstr_filt, pt.size = 0.4) +
ggtitle("DAY 15") +
theme(plot.title = element_text(hjust = 0.5,
size = 16),
legend.position = "none") +
scale_color_manual(values = clstr_colors) +
scale_x_discrete("UMAP1") +
scale_y_discrete("UMAP2")
d2
highlevel_legend <- cowplot::get_legend(d1_with_leg)
Number of cells
Day 3
length(day3_clstr_filt$orig.ident) # Day 3 total
## [1] 12065
table(day3_clstr_filt$eb_atlas_idents) %>%
kable(caption = "Day 3 high-level")
| Var1 | Freq |
|---|---|
| Keratinocytes | 2193 |
| Lymphoid | 4150 |
| Myeloid | 2507 |
| Epithelial | 1335 |
| Endothelial | 489 |
| Muscle tissue | 1092 |
| Connective tissue | 250 |
| Nervous tissue | 45 |
| Unknown | 4 |
table(day3_clstr_filt$eb_atlas_idents, day3_clstr_filt$PTID)
##
## 1 5 7 8 9 10 11 12 13 16 Doublet Negative
## Keratinocytes 45 127 421 205 205 407 0 84 196 503 0 0
## Lymphoid 274 459 218 420 553 258 0 592 688 688 0 0
## Myeloid 213 321 142 510 279 107 0 388 285 262 0 0
## Epithelial 32 95 209 85 121 312 0 106 146 229 0 0
## Endothelial 27 24 20 99 55 50 0 61 68 85 0 0
## Muscle tissue 97 112 112 130 88 83 0 98 112 260 0 0
## Connective tissue 19 42 26 39 16 5 0 36 23 44 0 0
## Nervous tissue 5 4 2 1 5 6 0 3 9 10 0 0
## Unknown 0 0 0 2 0 1 0 0 0 1 0 0
Day 15
length(day15_clstr_filt$orig.ident) # Day 15 total
## [1] 4691
table(day15_clstr_filt$eb_atlas_idents) %>%
kable(caption = "Day 15 high-level")
| Var1 | Freq |
|---|---|
| Keratinocytes | 1345 |
| Lymphoid | 1610 |
| Myeloid | 480 |
| Epithelial | 495 |
| Endothelial | 193 |
| Muscle tissue | 422 |
| Connective tissue | 63 |
| Nervous tissue | 27 |
| Unknown | 56 |
table(day15_clstr_filt$eb_atlas_idents, day15_clstr_filt$PTID)
##
## 1 5 7 8 9 10 11 12 13 16 Doublet Negative
## Keratinocytes 289 0 0 0 261 22 546 0 116 111 0 0
## Lymphoid 331 0 0 0 498 103 197 0 343 138 0 0
## Myeloid 89 0 0 0 168 38 42 0 58 85 0 0
## Epithelial 79 0 0 0 102 26 215 0 36 37 0 0
## Endothelial 53 0 0 0 16 12 83 0 18 11 0 0
## Muscle tissue 140 0 0 0 67 42 103 0 53 17 0 0
## Connective tissue 24 0 0 0 5 4 20 0 9 1 0 0
## Nervous tissue 12 0 0 0 2 1 8 0 3 1 0 0
## Unknown 27 0 0 0 7 3 8 0 6 5 0 0
Look at top 5 positive markers by logFC for each cell type.
# Day 3
singler_markers_d3 <- FindAllMarkers(day3_clstr_filt, only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25,
# Use MAST with raw counts
assay = "RNA",
slot = "counts",
test.use = "MAST")
singler_markers_d3 %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC) %>%
kable(caption = "Day 3: Top 5 Markers")
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | cluster | gene |
|---|---|---|---|---|---|---|
| 0.0000000 | 6.565390 | 0.428 | 0.110 | 0.0000000 | Keratinocytes | CNFN |
| 0.0000000 | 6.099586 | 0.806 | 0.268 | 0.0000000 | Keratinocytes | KRTDAP |
| 0.0000000 | 5.980603 | 0.413 | 0.054 | 0.0000000 | Keratinocytes | KRT1 |
| 0.0000000 | 5.937167 | 0.417 | 0.076 | 0.0000000 | Keratinocytes | CALML5 |
| 0.0000000 | 5.523713 | 0.495 | 0.153 | 0.0000000 | Keratinocytes | S100A7 |
| 0.0000000 | 5.754953 | 0.400 | 0.009 | 0.0000000 | Lymphoid | LCK |
| 0.0000000 | 5.743634 | 0.310 | 0.007 | 0.0000000 | Lymphoid | CD3D |
| 0.0000000 | 5.463430 | 0.302 | 0.008 | 0.0000000 | Lymphoid | CD3E |
| 0.0000000 | 5.115741 | 0.276 | 0.009 | 0.0000000 | Lymphoid | SPOCK2 |
| 0.0000000 | 4.651658 | 0.259 | 0.015 | 0.0000000 | Lymphoid | CD7 |
| 0.0000000 | 7.411502 | 0.293 | 0.002 | 0.0000000 | Myeloid | LILRB4 |
| 0.0000000 | 7.161655 | 0.788 | 0.069 | 0.0000000 | Myeloid | LYZ |
| 0.0000000 | 6.865403 | 0.450 | 0.013 | 0.0000000 | Myeloid | IL1B |
| 0.0000000 | 6.553915 | 0.302 | 0.005 | 0.0000000 | Myeloid | MS4A7 |
| 0.0000000 | 6.538311 | 0.387 | 0.007 | 0.0000000 | Myeloid | SERPINA1 |
| 0.0000000 | 4.434207 | 0.285 | 0.024 | 0.0000000 | Epithelial | KRT15 |
| 0.0000000 | 3.738123 | 0.345 | 0.054 | 0.0000000 | Epithelial | LAMB3 |
| 0.0000000 | 3.689183 | 0.418 | 0.040 | 0.0000000 | Epithelial | COL17A1 |
| 0.0000000 | 3.588351 | 0.816 | 0.324 | 0.0000000 | Epithelial | S100A2 |
| 0.0000000 | 3.179110 | 0.258 | 0.042 | 0.0000000 | Epithelial | LAMC2 |
| 0.0000000 | 8.700318 | 0.317 | 0.001 | 0.0000000 | Endothelial | NR5A2 |
| 0.0000000 | 8.692648 | 0.417 | 0.002 | 0.0000000 | Endothelial | TIE1 |
| 0.0000000 | 8.577542 | 0.301 | 0.003 | 0.0000000 | Endothelial | SELE |
| 0.0000000 | 8.541233 | 0.301 | 0.001 | 0.0000000 | Endothelial | SOX17 |
| 0.0000000 | 8.472049 | 0.703 | 0.007 | 0.0000000 | Endothelial | PLVAP |
| 0.0000000 | 6.157499 | 0.291 | 0.007 | 0.0000000 | Muscle tissue | TMEM119 |
| 0.0000000 | 5.894402 | 0.640 | 0.021 | 0.0000000 | Muscle tissue | PRRX1 |
| 0.0000000 | 5.677099 | 0.501 | 0.017 | 0.0000000 | Muscle tissue | MEDAG |
| 0.0000000 | 5.594135 | 0.543 | 0.021 | 0.0000000 | Muscle tissue | COL3A1 |
| 0.0000000 | 5.578659 | 0.417 | 0.013 | 0.0000000 | Muscle tissue | PRKG1 |
| 0.0000000 | 4.705894 | 0.544 | 0.059 | 0.0000000 | Connective tissue | MT1M |
| 0.0000000 | 4.422745 | 0.416 | 0.030 | 0.0000000 | Connective tissue | PLAC9 |
| 0.0000000 | 4.358383 | 0.544 | 0.037 | 0.0000000 | Connective tissue | CXCL12 |
| 0.0000000 | 4.275321 | 0.340 | 0.024 | 0.0000000 | Connective tissue | PLA2G2A |
| 0.0000000 | 4.008408 | 0.260 | 0.023 | 0.0000000 | Connective tissue | MT1A |
| 0.0000000 | 8.811318 | 0.422 | 0.001 | 0.0000000 | Nervous tissue | CDH19 |
| 0.0000000 | 8.016902 | 0.422 | 0.002 | 0.0000000 | Nervous tissue | PLP1 |
| 0.0000000 | 6.071943 | 0.333 | 0.008 | 0.0000000 | Nervous tissue | MIA |
| 0.0000000 | 5.990907 | 0.267 | 0.006 | 0.0000000 | Nervous tissue | FRMD5 |
| 0.0000000 | 5.515328 | 0.267 | 0.011 | 0.0000000 | Nervous tissue | NRXN1 |
| 0.0001235 | 10.558062 | 0.250 | 0.000 | 1.0000000 | Unknown | HPGDS |
| 0.0077887 | 10.558062 | 0.250 | 0.000 | 1.0000000 | Unknown | AL022341.2 |
| 0.0089853 | 10.236134 | 0.250 | 0.000 | 1.0000000 | Unknown | AC011944.1 |
| 0.0096183 | 9.973099 | 0.250 | 0.000 | 1.0000000 | Unknown | IL22RA2 |
| 0.0000048 | 9.857622 | 0.250 | 0.001 | 0.1231517 | Unknown | AC006230.1 |
# Day 15
singler_markers_d15 <- FindAllMarkers(day15_clstr_filt, only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25,
# Use MAST with raw counts
assay = "RNA",
slot = "counts",
test.use = "MAST")
singler_markers_d15 %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC) %>%
kable(caption = "Day 15: Top 5 Markers")
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | cluster | gene |
|---|---|---|---|---|---|---|
| 0.0000000 | 6.9723688 | 0.425 | 0.074 | 0.00e+00 | Keratinocytes | CALML5 |
| 0.0000000 | 6.8759014 | 0.709 | 0.182 | 0.00e+00 | Keratinocytes | KRTDAP |
| 0.0000000 | 6.6643508 | 0.281 | 0.036 | 0.00e+00 | Keratinocytes | KRT2 |
| 0.0000000 | 6.5777890 | 0.797 | 0.268 | 0.00e+00 | Keratinocytes | KRT1 |
| 0.0000000 | 6.5383942 | 0.379 | 0.015 | 0.00e+00 | Keratinocytes | DSC1 |
| 0.0000000 | 8.9447666 | 0.301 | 0.001 | 0.00e+00 | Lymphoid | CTLA4 |
| 0.0000000 | 8.2351296 | 0.332 | 0.003 | 0.00e+00 | Lymphoid | GZMA |
| 0.0000000 | 8.0777485 | 0.470 | 0.003 | 0.00e+00 | Lymphoid | TRAT1 |
| 0.0000000 | 8.0570675 | 0.669 | 0.006 | 0.00e+00 | Lymphoid | CD2 |
| 0.0000000 | 8.0378760 | 0.324 | 0.002 | 0.00e+00 | Lymphoid | SH2D1A |
| 0.0000000 | 9.2205194 | 0.304 | 0.001 | 0.00e+00 | Myeloid | TLR8 |
| 0.0000000 | 8.9358580 | 0.419 | 0.004 | 0.00e+00 | Myeloid | FPR3 |
| 0.0000000 | 8.8306034 | 0.585 | 0.008 | 0.00e+00 | Myeloid | LILRB4 |
| 0.0000000 | 8.6985667 | 0.352 | 0.001 | 0.00e+00 | Myeloid | FCAR |
| 0.0000000 | 8.6876454 | 0.485 | 0.005 | 0.00e+00 | Myeloid | CD86 |
| 0.0000000 | 3.6611581 | 0.521 | 0.103 | 0.00e+00 | Epithelial | LAMB3 |
| 0.0000000 | 3.3453214 | 0.366 | 0.051 | 0.00e+00 | Epithelial | NRG1 |
| 0.0000000 | 3.3074881 | 0.475 | 0.115 | 0.00e+00 | Epithelial | LAMC2 |
| 0.0000000 | 3.0008771 | 0.335 | 0.060 | 0.00e+00 | Epithelial | P3H2 |
| 0.0000000 | 2.6492344 | 0.539 | 0.120 | 0.00e+00 | Epithelial | ITGA2 |
| 0.0000000 | 10.4252540 | 0.648 | 0.004 | 0.00e+00 | Endothelial | PLVAP |
| 0.0000000 | 10.3305135 | 0.492 | 0.001 | 0.00e+00 | Endothelial | MYCT1 |
| 0.0000000 | 10.1175197 | 0.534 | 0.002 | 0.00e+00 | Endothelial | CLDN5 |
| 0.0000000 | 10.0591864 | 0.306 | 0.002 | 0.00e+00 | Endothelial | SELE |
| 0.0000000 | 9.8988089 | 0.554 | 0.005 | 0.00e+00 | Endothelial | CCL14 |
| 0.0000000 | 7.4179411 | 0.289 | 0.008 | 0.00e+00 | Muscle tissue | LINC01060 |
| 0.0000000 | 7.0628205 | 0.403 | 0.009 | 0.00e+00 | Muscle tissue | PDE3A |
| 0.0000000 | 7.0094285 | 0.412 | 0.107 | 0.00e+00 | Muscle tissue | MMP1 |
| 0.0000000 | 6.8328116 | 0.791 | 0.034 | 0.00e+00 | Muscle tissue | PRKG1 |
| 0.0000000 | 6.6206186 | 0.273 | 0.004 | 0.00e+00 | Muscle tissue | ENPEP |
| 0.0000000 | 5.5049964 | 0.254 | 0.008 | 0.00e+00 | Connective tissue | FIBIN |
| 0.0000000 | 5.2443362 | 0.270 | 0.026 | 0.00e+00 | Connective tissue | CH25H |
| 0.0000000 | 5.1724210 | 0.302 | 0.013 | 0.00e+00 | Connective tissue | OMD |
| 0.0000000 | 5.1317790 | 0.254 | 0.008 | 0.00e+00 | Connective tissue | MMP27 |
| 0.0000000 | 4.9358588 | 0.270 | 0.011 | 0.00e+00 | Connective tissue | FREM1 |
| 0.0000000 | 10.8780240 | 0.593 | 0.012 | 0.00e+00 | Nervous tissue | TYRP1 |
| 0.0000000 | 10.0840981 | 0.593 | 0.007 | 0.00e+00 | Nervous tissue | DCT |
| 0.0000000 | 10.0059314 | 0.704 | 0.001 | 0.00e+00 | Nervous tissue | CDH19 |
| 0.0000000 | 9.8207352 | 0.593 | 0.002 | 0.00e+00 | Nervous tissue | TYR |
| 0.0000000 | 9.6972923 | 0.481 | 0.010 | 0.00e+00 | Nervous tissue | MLANA |
| 0.0000000 | 3.2906253 | 0.286 | 0.009 | 0.00e+00 | Unknown | TPSAB1 |
| 0.0000000 | 3.0944311 | 0.250 | 0.026 | 5.18e-05 | Unknown | KIT |
| 0.0001721 | 0.5231372 | 0.161 | 0.255 | 1.00e+00 | Unknown | CSKMT |
DefaultAssay(day3_clstr_filt) <- "SCT"
DefaultAssay(day15_clstr_filt) <- "SCT"
# CD45 expression
cd45_d3 <- FeaturePlot(day3_clstr_filt, features = "PTPRC", label = TRUE, repel = TRUE) +
NoLegend() +
ggtitle("Day 3: CD45 ('PTPRC')")
cd45_d3
cd45_d15 <- FeaturePlot(day15_clstr_filt, features = "PTPRC", label = TRUE, repel = TRUE) +
NoLegend() +
ggtitle("Day 15: CD45 ('PTPRC')")
cd45_d15
Day 3
make_marker_plot <- function(obj, marker) {
out <- FeaturePlot(obj, features = marker, pt.size = 0.4) +
theme(legend.position = "none",
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
plot.title = element_text(hjust = 0.5,
size = 20,
face = "italic"))
return(out)
}
d3_1 <- make_marker_plot(day3_clstr_filt, "IL7R")
d3_2 <- make_marker_plot(day3_clstr_filt, "LYZ")
d3_3 <- make_marker_plot(day3_clstr_filt, "S100A2")
d3_4 <- make_marker_plot(day3_clstr_filt, "DCN")
d3_5 <- make_marker_plot(day3_clstr_filt, "DCT")
d3_6 <- make_marker_plot(day3_clstr_filt, "FLT1")
d3_7 <- make_marker_plot(day3_clstr_filt, "COL3A1")
d3_8 <- make_marker_plot(day3_clstr_filt, "DMKN")
d3_grid <- plot_grid(d3_1, d3_2, d3_3, d3_4, d3_5, d3_6, d3_7, d3_8, nrow = 2)
d3_grid
Day 15
d15_1 <- make_marker_plot(day15_clstr_filt, "IL7R")
d15_2 <- make_marker_plot(day15_clstr_filt, "LYZ")
d15_3 <- make_marker_plot(day15_clstr_filt, "S100A2")
d15_4 <- make_marker_plot(day15_clstr_filt, "DCN")
d15_5 <- make_marker_plot(day15_clstr_filt, "DCT")
d15_6 <- make_marker_plot(day15_clstr_filt, "FLT1")
d15_7 <- make_marker_plot(day15_clstr_filt, "COL3A1")
d15_8 <- make_marker_plot(day15_clstr_filt, "DMKN")
d15_grid <- plot_grid(d15_1, d15_2, d15_3, d15_4, d15_5, d15_6, d15_7, d15_8, nrow = 2)
d15_grid
First subset and remove current normalized counts
day3_sub <- subset(day3_clstr_filt, idents = c("Lymphoid", "Myeloid"))
DefaultAssay(day3_sub) <- "RNA"
day3_sub[["SCT"]] <- NULL
day15_sub <- subset(day15_clstr_filt, idents = c("Lymphoid", "Myeloid"))
DefaultAssay(day15_sub) <- "RNA"
day15_sub[["SCT"]] <- NULL
Same norm_and_reduce from GEX QC script.
norm_and_reduce <- function(dm_singlets) {
dm_singlets <- SCTransform(dm_singlets,
vars.to.regress = "percent.mt",
verbose = FALSE)
dm_singlets <- RunPCA(dm_singlets, verbose = FALSE)
dm_singlets <- FindNeighbors(dm_singlets, dims = 1:30)
dm_singlets <- RunUMAP(dm_singlets, dims = 1:30)
return(dm_singlets)
}
Day 3
Resolution = 0.4 is what Jolie used for the HAARVI paper and seems reasonable here.
day3_sub <- norm_and_reduce(day3_sub)
## Computing nearest neighbor graph
## Computing SNN
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:13:33 UMAP embedding parameters a = 0.9922 b = 1.112
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## 16:13:33 Read 6657 rows and found 30 numeric columns
## 16:13:33 Using Annoy for neighbor search, n_neighbors = 30
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## 16:13:33 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:13:34 Writing NN index file to temp file /tmp/RtmpFeilP1/file71b2142d7a4a
## 16:13:34 Searching Annoy index using 1 thread, search_k = 3000
## 16:13:36 Annoy recall = 100%
## 16:13:37 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:13:39 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:13:39 Commencing optimization for 500 epochs, with 280390 positive edges
## 16:13:46 Optimization finished
day3_sub_clstr <- FindClusters(day3_sub, verbose = FALSE, resolution = 0.4)
Visualize
p7 <- DimPlot(day3_sub_clstr, label = TRUE) +
NoLegend() +
ggtitle("Day 3 Seurat Sub-Clusters")
p7
Day 15
day15_sub <- norm_and_reduce(day15_sub)
## Computing nearest neighbor graph
## Computing SNN
## 16:14:02 UMAP embedding parameters a = 0.9922 b = 1.112
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## 16:14:02 Read 2090 rows and found 30 numeric columns
## 16:14:02 Using Annoy for neighbor search, n_neighbors = 30
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## 16:14:02 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:14:02 Writing NN index file to temp file /tmp/RtmpFeilP1/file71b2435d16ac
## 16:14:02 Searching Annoy index using 1 thread, search_k = 3000
## 16:14:03 Annoy recall = 100%
## 16:14:04 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:14:06 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:14:06 Commencing optimization for 500 epochs, with 81080 positive edges
## 16:14:09 Optimization finished
day15_sub_clstr <- FindClusters(day15_sub, verbose = FALSE, resolution = 0.4)
Visualize
p8 <- DimPlot(day15_sub_clstr, label = TRUE) +
NoLegend() +
ggtitle("Day 15 Seurat Sub-Clusters")
p8
p7 + p8
Use Azimuth (part of Seurat and recommended by recent review paper)
The web app FAQs have helpful information for interpreting results: https://azimuth.hubmapconsortium.org/#How%20can%20I%20tell%20if%20my%20mapping%20results%20are%20accurate%3f
day3_sub_annot <- RunAzimuth(day3_sub_clstr, reference = "pbmcref")
DimPlot(day3_sub_annot, group.by = "predicted.celltype.l2", label = TRUE, repel = TRUE) +
ggtitle("Day 3 Azimuth Annotations") +
NoLegend()
day15_sub_annot <- RunAzimuth(day15_sub_clstr, reference = "pbmcref")
DimPlot(day15_sub_annot, group.by = "predicted.celltype.l2", label = TRUE, repel = TRUE) +
ggtitle("Day 15 Azimuth Annotations") +
NoLegend()
To compare with Jolie’s data and make things tidier.
consol_azi <- function(in_obj) {
Idents(in_obj) <- "predicted.celltype.l2"
in_obj <- RenameIdents(object = in_obj,
"B intermediate" = "B cells",
"B memory" = "B cells",
"NK Proliferating" = "NK",
"NK_CD56bright" = "NK")
in_obj[["eb_idents"]] <- Idents(object = in_obj)
return(in_obj)
}
day3_sub_annot <- consol_azi(day3_sub_annot)
day15_sub_annot <- consol_azi(day15_sub_annot)
saveRDS(day3_sub_annot, file = file.path(script_output_dir, "processed_data/7_sub_annot_d3.rds"))
saveRDS(day15_sub_annot, file = file.path(script_output_dir, "processed_data/7_sub_annot_d15.rds"))
day3_sub_annot <- readRDS(file = file.path(script_output_dir, "processed_data/7_sub_annot_d3.rds"))
day15_sub_annot <- readRDS(file = file.path(script_output_dir, "processed_data/7_sub_annot_d15.rds"))
Get rid of the one Day 3 cell where eb_idents is NA (causes errors down the line).
na_cell <- day3_sub_annot@meta.data %>%
filter(is.na(eb_idents)) %>%
rownames()
length(na_cell)
## [1] 0
day3_sub_annot <- subset(x = day3_sub_annot, idents = na_cell, invert = TRUE)
How many cells of each type?
Day 3
length(day3_sub_annot$orig.ident) # Day 3 immune total
## [1] 6657
d3_immune_tally <- day3_sub_annot@meta.data %>%
group_by(eb_idents) %>%
tally()
d3_immune_tally %>%
kable(caption = "Day 3 immune-level")
| eb_idents | n |
|---|---|
| B cells | 62 |
| NK | 196 |
| CD8 TEM | 501 |
| ILC | 146 |
| CD14 Mono | 1981 |
| CD4 TCM | 2574 |
| CD4 TEM | 136 |
| cDC2 | 180 |
| CD8 TCM | 112 |
| dnT | 32 |
| MAIT | 71 |
| Platelet | 145 |
| Treg | 85 |
| CD16 Mono | 139 |
| CD8 Naive | 63 |
| gdT | 84 |
| pDC | 58 |
| CD8 Proliferating | 6 |
| CD4 Proliferating | 48 |
| cDC1 | 17 |
| HSPC | 18 |
| ASDC | 1 |
| CD4 CTL | 1 |
| Eryth | 1 |
table(day3_sub_annot$eb_idents, day3_sub_annot$PTID)
##
## 1 5 7 8 9 10 11 12 13 16 Doublet Negative
## B cells 6 11 1 14 9 2 0 6 1 12 0 0
## NK 28 41 12 19 17 11 0 10 33 25 0 0
## CD8 TEM 41 60 47 30 98 34 0 48 77 66 0 0
## ILC 24 31 7 3 20 11 0 16 17 17 0 0
## CD14 Mono 177 227 101 397 224 69 0 338 248 200 0 0
## CD4 TCM 139 243 114 299 279 166 0 456 423 455 0 0
## CD4 TEM 5 22 11 10 33 9 0 11 15 20 0 0
## cDC2 12 25 25 39 19 12 0 19 8 21 0 0
## CD8 TCM 9 7 7 11 21 6 0 9 27 15 0 0
## dnT 2 1 2 1 3 5 0 3 4 11 0 0
## MAIT 0 9 6 1 26 5 0 3 13 8 0 0
## Platelet 11 22 8 36 13 8 0 17 14 16 0 0
## Treg 4 15 4 10 8 4 0 11 16 13 0 0
## CD16 Mono 7 34 4 24 18 14 0 9 12 17 0 0
## CD8 Naive 2 0 1 3 12 0 0 3 24 18 0 0
## gdT 3 4 2 11 9 6 0 7 23 19 0 0
## pDC 3 14 3 4 14 1 0 2 9 8 0 0
## CD8 Proliferating 0 0 0 0 4 0 0 1 1 0 0 0
## CD4 Proliferating 12 3 1 9 5 1 0 7 6 4 0 0
## cDC1 0 4 1 8 0 0 0 2 0 2 0 0
## HSPC 2 7 3 1 0 1 0 1 2 1 0 0
## ASDC 0 0 0 0 0 0 0 0 0 1 0 0
## CD4 CTL 0 0 0 0 0 0 0 1 0 0 0 0
## Eryth 0 0 0 0 0 0 0 0 0 1 0 0
Day 15
length(day15_sub_annot$orig.ident) # Day 15 immune total
## [1] 2090
d15_immune_tally <- day15_sub_annot@meta.data %>%
group_by(eb_idents) %>%
tally()
d15_immune_tally %>%
kable(caption = "Day 15 immune-level")
| eb_idents | n |
|---|---|
| B cells | 22 |
| NK | 61 |
| CD4 TCM | 935 |
| ILC | 21 |
| Platelet | 79 |
| MAIT | 107 |
| CD14 Mono | 335 |
| CD8 TEM | 295 |
| CD4 TEM | 52 |
| Treg | 52 |
| CD8 TCM | 26 |
| cDC2 | 41 |
| pDC | 6 |
| cDC1 | 17 |
| CD8 Naive | 11 |
| dnT | 4 |
| HSPC | 14 |
| CD4 Proliferating | 10 |
| CD16 Mono | 1 |
| gdT | 1 |
table(day15_sub_annot$eb_idents, day15_sub_annot$PTID)
##
## 1 5 7 8 9 10 11 12 13 16 Doublet Negative
## B cells 2 0 0 0 10 0 2 0 6 2 0 0
## NK 10 0 0 0 15 5 17 0 8 6 0 0
## CD4 TCM 204 0 0 0 261 63 82 0 237 88 0 0
## ILC 5 0 0 0 6 3 4 0 1 2 0 0
## Platelet 26 0 0 0 13 19 9 0 4 8 0 0
## MAIT 8 0 0 0 72 2 11 0 12 2 0 0
## CD14 Mono 32 0 0 0 143 17 19 0 51 73 0 0
## CD8 TEM 64 0 0 0 72 20 58 0 51 30 0 0
## CD4 TEM 6 0 0 0 28 1 6 0 10 1 0 0
## Treg 23 0 0 0 13 2 6 0 5 3 0 0
## CD8 TCM 4 0 0 0 9 3 6 0 4 0 0 0
## cDC2 23 0 0 0 5 1 9 0 1 2 0 0
## pDC 0 0 0 0 4 0 0 0 0 2 0 0
## cDC1 9 0 0 0 3 1 3 0 1 0 0 0
## CD8 Naive 0 0 0 0 6 0 2 0 3 0 0 0
## dnT 1 0 0 0 0 1 0 0 2 0 0 0
## HSPC 2 0 0 0 2 2 3 0 2 3 0 0
## CD4 Proliferating 1 0 0 0 2 1 2 0 3 1 0 0
## CD16 Mono 0 0 0 0 1 0 0 0 0 0 0 0
## gdT 0 0 0 0 1 0 0 0 0 0 0 0
Remove types with < 5 cells
# Day 3
d3_imm_exclude <- d3_immune_tally %>%
filter(n < 5) %>%
pull(eb_idents)
print(d3_imm_exclude)
## [1] ASDC CD4 CTL Eryth
## 24 Levels: B cells NK CD8 TEM ILC CD14 Mono CD4 TCM CD4 TEM cDC2 ... Eryth
d3_subset <- subset(x = day3_sub_annot,
idents = d3_imm_exclude, invert = TRUE)
# Day 15
d15_imm_exclude <- d15_immune_tally %>%
filter(n < 5) %>%
pull(eb_idents)
print(d15_imm_exclude)
## [1] dnT CD16 Mono gdT
## 20 Levels: B cells NK CD4 TCM ILC Platelet MAIT CD14 Mono CD8 TEM ... gdT
d15_subset <- subset(x = day15_sub_annot,
idents = d15_imm_exclude, invert = TRUE)
Set factor levels and colors of immune subtypes
d3_subset@meta.data$eb_idents <- factor(d3_subset@meta.data$eb_idents,
levels = c("CD4 TCM", "CD4 TEM", "CD4 Proliferating",
"CD8 TCM", "CD8 TEM", "CD8 Proliferating", "CD8 Naive",
"Treg", "dnT", "gdT", "MAIT", "CD14 Mono", "CD16 Mono",
"cDC1", "cDC2", "pDC","ILC", "NK", "Platelet", "B cells",
"HSPC"))
# No CD8 Proliferating, gdT, CD16 Mono, unlike D3
d15_subset@meta.data$eb_idents <- factor(d15_subset@meta.data$eb_idents,
levels = c("CD4 TCM", "CD4 TEM", "CD4 Proliferating",
"CD8 TCM", "CD8 TEM", "CD8 Naive",
"Treg", "MAIT", "CD14 Mono",
"cDC1", "cDC2", "pDC","ILC", "NK", "Platelet", "B cells",
"HSPC"))
subtype_cols <- c(
"B cells" = "darkorange",
"cDC1" = "mediumpurple",
"cDC2" = "mediumpurple",
"pDC" = "mediumpurple",
"ILC" = "darkgreen",
"CD14 Mono" = "#FF61CC",
"CD16 Mono" = "#FF61CC",
"NK" = "#0CB702",
"Platelet" = "gold",
"Tcm" = "dodgerblue",
"CD8 Naive" = "#00BFC4",
"CD8 Proliferating" = "#00BFC4",
"CD8 TEM" = "#00BFC4",
"CD4 Proliferating" = "darkblue",
"CD4 TEM" = "darkblue",
"CD4 TCM" = "darkblue",
"Treg" = "cyan",
"dnT" = "cyan",
"gdT" = "cyan",
"MAIT" = "cyan",
"HSPC" = "saddlebrown")
Visualize
d3_sub_plt <- DimPlot(d3_subset, group.by = "eb_idents", pt.size = 0.4,
cols = subtype_cols) +
ggtitle("DAY 3") +
theme(plot.title = element_text(hjust = 0.5,
size = 16),
legend.position = "bottom",
legend.spacing.x = unit(0.4, 'cm'),
legend.text = element_text(size=12, margin = margin(t = 1))) +
guides(color = guide_legend(nrow = 3,
override.aes = list(size=4))) +
scale_x_discrete("UMAP1") +
scale_y_discrete("UMAP2")
d3_sub_plt
d15_sub_plt <- DimPlot(d15_subset, group.by = "eb_idents", pt.size = 0.4,
cols = subtype_cols) +
ggtitle("DAY 15") +
theme(plot.title = element_text(hjust = 0.5,
size = 16),
legend.position = "bottom",
legend.spacing.x = unit(0.4, 'cm'),
legend.text = element_text(size=12, margin = margin(t = 1))) +
guides(color = guide_legend(nrow = 3,
override.aes = list(size=4))) +
scale_x_discrete("UMAP1") +
scale_y_discrete("UMAP2")
d15_sub_plt
# Get legend
subtype_legend <- cowplot::get_legend(d3_sub_plt)
# Plots without legend
d3_sub_noleg <- DimPlot(d3_subset, group.by = "eb_idents", pt.size = 0.4,
cols = subtype_cols) +
ggtitle("DAY 3") +
theme(plot.title = element_text(hjust = 0.5, size = 16),
legend.position = "none") +
scale_x_discrete("UMAP1") +
scale_y_discrete("UMAP2")
d15_sub_noleg <- DimPlot(d15_subset, group.by = "eb_idents", pt.size = 0.4,
cols = subtype_cols) +
ggtitle("DAY 15") +
theme(plot.title = element_text(hjust = 0.5, size = 16),
legend.position = "none") +
scale_x_discrete("UMAP1") +
scale_y_discrete("UMAP2")
Save objects
saveRDS(d3_subset, file = file.path(script_output_dir, "processed_data/8_annot_sub_final_d3.rds"))
saveRDS(d15_subset, file = file.path(script_output_dir, "processed_data/8_annot_sub_final_d15.rds"))
d3_subset <- readRDS(file = file.path(script_output_dir, "processed_data/8_annot_sub_final_d3.rds"))
d15_subset <- readRDS(file = file.path(script_output_dir, "processed_data/8_annot_sub_final_d15.rds"))
Number of immune cells
length(d3_subset$orig.ident) # Day 3
## [1] 6654
length(d15_subset$orig.ident) # Day 15
## [1] 2084
Seurat clusters
d1 <- DimPlot(d3_subset, pt.size = 0.4, group.by = "seurat_clusters", label = TRUE) +
NoLegend() +
ggtitle("Day 3")
d2 <- DimPlot(d15_subset, pt.size = 0.4, group.by = "seurat_clusters", label = TRUE) +
NoLegend() +
ggtitle("Day 15")
d1 + d2
By PTID
ptid_colors <- c("1" = "salmon", "5" = "orange", "7" = "#7570B3",
"8" = "#0CB702", "9" = "#A6761D", "10" = "#00BFC4",
"11" = "#C77CFF", "12" = "#FF61CC", "13" = "darkgreen",
"16" = "darkblue")
d3 <- DimPlot(d3_subset, pt.size = 0.4, group.by = "PTID", cols = ptid_colors) +
NoLegend() +
ggtitle("Day 3")
d4 <- DimPlot(d15_subset, pt.size = 0.4, group.by = "PTID", cols = ptid_colors) +
NoLegend() +
ggtitle("Day 15")
d3 + d4
By ARM
d5 <- DimPlot(d3_subset, pt.size = 0.4, group.by = "ARM") +
NoLegend() +
ggtitle("Day 3")
d6 <- DimPlot(d15_subset, pt.size = 0.4, group.by = "ARM") +
NoLegend() +
ggtitle("Day 15")
d5 + d6
Look at confidence and mapping scores. Note that Azimuth doesn’t have neutrophils in its reference.
ft1 <- FeaturePlot(d3_subset, features = "predicted.celltype.l2.score",
label = TRUE,
repel = TRUE,
pt.size = 0.4) +
ggtitle("Day 3 confidence scores") +
NoLegend()
ft2 <- FeaturePlot(d15_subset, features = "predicted.celltype.l2.score",
label = TRUE,
repel = TRUE,
pt.size = 0.4) +
ggtitle("Day 15 confidence scores")
cowplot::plot_grid(ft1, ft2)
ft3 <- FeaturePlot(d3_subset, features = "mapping.score",
label = TRUE,
repel = TRUE,
pt.size = 0.6) +
ggtitle("Day 3 mapping scores") +
NoLegend()
ft4 <- FeaturePlot(d15_subset, features = "mapping.score",
label = TRUE,
repel = TRUE,
pt.size = 0.6) +
ggtitle("Day 15 mapping scores")
cowplot::plot_grid(ft3, ft4)
Neutrophils aren’t in the Azimuth references, and so they’d be annotated as CD14 monocytes (possibly with low mapping scores). See:
Here, I check for low mapping scores and neutrophil markers in CD14 monocytes (none express CEACAM5). There doesn’t seem to be a defined neutrophil population at either timepoint
d3_cd14mono <- subset(d3_subset, subset = eb_idents == "CD14 Mono")
d15_cd14mono <- subset(d15_subset, subset = eb_idents == "CD14 Mono")
Mapping scores
ft5 <- FeaturePlot(d3_cd14mono, features = "mapping.score", pt.size = 0.6,
label = TRUE) +
ggtitle("Day 3 mapping scores") +
NoLegend()
ft6 <- FeaturePlot(d15_cd14mono, features = "mapping.score", pt.size = 0.6,
label = TRUE) +
ggtitle("Day 15 mapping scores")
cowplot::plot_grid(ft5, ft6)
Neutrophil marker expression
# No clear pattern, especially not connected to the mapping scores
ft7 <- FeaturePlot(d3_cd14mono, features = c("CEACAM1", "CEACAM6", "ITGAM",
"FCGR3A", "FUT4", "AZU1",
"ELANE", "MPO"), pt.size = 0.1)
ft8 <- FeaturePlot(d15_cd14mono, features = c("CEACAM1", "CEACAM6", "ITGAM",
"FCGR3A", "FUT4", "AZU1",
"ELANE", "MPO"), pt.size = 0.1)
cowplot::plot_grid(ft7, labels = "Day 3")
cowplot::plot_grid(ft8, labels = "Day 15")
Day 3
d3_11 <- make_marker_plot(d3_subset, "CD3D")
d3_12 <- make_marker_plot(d3_subset, "CTSS")
d3_13 <- make_marker_plot(d3_subset, "HLA-DPA1")
d3_14 <- make_marker_plot(d3_subset, "NKG7")
d3_15 <- make_marker_plot(d3_subset, "MS4A1")
d3_grid_imm <- plot_grid(d3_11, d3_12, d3_13, d3_14, d3_15, nrow = 3)
d3_grid_imm + DimPlot(d3_subset, pt.size = 0.4, group.by = "eb_idents",
cols = subtype_cols)
Day 15
# TODO: Decide on final order to make plots (maybe look at mast cells earlier, include those in immmune subset and see where they cluster for further confirmation they should be their own thing)
d15_11 <- make_marker_plot(d15_subset, "CD3D")
d15_12 <- make_marker_plot(d15_subset, "CTSS")
d15_13 <- make_marker_plot(d15_subset, "HLA-DPA1")
# Some monocytes expressing this, but not the other markers for NK-like monocytes
d15_14 <- make_marker_plot(d15_subset, "NKG7")
d15_15 <- make_marker_plot(d15_subset, "MS4A1")
d15_grid_imm <- plot_grid(d15_11, d15_12, d15_13, d15_14, d15_15, nrow = 2)
d15_grid_imm + DimPlot(d15_subset, pt.size = 0.4, group.by = "eb_idents",
cols = subtype_cols)
Following instructions from issue #1748. I’ll replace the SingleR_prunedlabels with the more detailed Azimuth ones to create a ‘detailed’ meta.data column. I’ll replace my consolidated SingleR annotations with my consolidated Azimuth ones to create a ‘consolidated’ meta.data column.
replace_labels <- function(main_obj, sub_obj) {
# 'detailed' labels
Idents(main_obj) <- "SingleR_prunedlabels"
Idents(sub_obj) <- "predicted.celltype.l2"
main_obj$detailed <- as.character(Idents(main_obj))
main_obj$detailed[Cells(sub_obj)] <- as.character(Idents(sub_obj))
# 'consolidated' labels
Idents(main_obj) <- "eb_atlas_idents"
Idents(sub_obj) <- "eb_idents"
main_obj$consolidated <- as.character(Idents(main_obj))
main_obj$consolidated[Cells(sub_obj)] <- as.character(Idents(sub_obj))
return(main_obj)
}
day3_clstr_filt <- replace_labels(day3_clstr_filt, d3_subset)
day15_clstr_filt <- replace_labels(day15_clstr_filt, d15_subset)
Day 15 Unknowns
One group of cells seem to be mast cells based on:
These are part of a Seurat cluster that’s spread around the UMAP, so I’ll re-annotate based on TPSAB1 expression.
mastplt <- FeaturePlot(day15_clstr_filt, features = "TPSAB1")
mastplt
# Get and look at the cells expressing high levels of TPSAB1
mast_cells <- WhichCells(day15_clstr_filt, expression = TPSAB1 > 1.1)
DimPlot(day15_clstr_filt, cells.highlight = list(mast_cells))
# Re-annotate
day15_clstr_filt@meta.data <- day15_clstr_filt@meta.data %>%
mutate(barcode = rownames(.)) %>%
mutate(consolidated = case_when(
barcode %in% mast_cells ~ "Mast cells",
.default = consolidated
))
Note that there doesn’t seem to be a mast cell cluster at Day 3.
FeaturePlot(day3_clstr_filt, features = "TPSAB1")
Use Azimuth annotations to decide if a cell is Myeloid or Lymphoid
# Leave these cell types as-is: HSPC, pDC, cDC1, cDC2
myeloid_type <- c("CD14 Mono", "CD16 Mono", "Platelet", "Mast cells")
lymphoid_type <- c("B cells", "CD4 Proliferating", "CD4 TCM", "CD4 TEM",
"CD8 Naive", "CD8 Proliferating", "CD8 TCM", "CD8 TEM",
"dnT", "gdT", "MAIT", "NK", "ILC", "Treg")
# Day 3
day3_clstr_filt@meta.data <- day3_clstr_filt@meta.data %>%
mutate(eb_atlas_idents = case_when(
consolidated %in% myeloid_type ~ "Myeloid",
consolidated %in% lymphoid_type ~ "Lymphoid",
.default = eb_atlas_idents
))
# Day 15
day15_clstr_filt@meta.data <- day15_clstr_filt@meta.data %>%
mutate(eb_atlas_idents = case_when(
consolidated %in% myeloid_type ~ "Myeloid",
consolidated %in% lymphoid_type ~ "Lymphoid",
.default = eb_atlas_idents
))
Save
# These are necessary for the next script
saveRDS(day3_clstr_filt, file = file.path(script_output_dir, "processed_data/9_final_annot_d3.rds"))
saveRDS(day15_clstr_filt, file = file.path(script_output_dir, "processed_data/9_final_annot_d15.rds"))
sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/Los_Angeles
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] cowplot_1.1.1 Azimuth_0.5.0
## [3] shinyBS_0.61.1 knitr_1.45
## [5] celldex_1.10.1 SingleR_2.2.0
## [7] SummarizedExperiment_1.30.2 Biobase_2.60.0
## [9] GenomicRanges_1.52.1 GenomeInfoDb_1.36.4
## [11] IRanges_2.34.1 S4Vectors_0.38.2
## [13] BiocGenerics_0.46.0 MatrixGenerics_1.12.3
## [15] matrixStats_1.0.0 lubridate_1.9.3
## [17] forcats_1.0.0 stringr_1.5.0
## [19] dplyr_1.1.3 purrr_1.0.2
## [21] readr_2.1.5 tidyr_1.3.1
## [23] tibble_3.2.1 ggplot2_3.4.4
## [25] tidyverse_2.0.0 Seurat_5.0.1
## [27] SeuratObject_5.0.1 sp_2.1-1
##
## loaded via a namespace (and not attached):
## [1] R.methodsS3_1.8.2 progress_1.2.3
## [3] poweRlaw_0.70.6 goftest_1.2-3
## [5] DT_0.30 Biostrings_2.68.1
## [7] vctrs_0.6.4 spatstat.random_3.2-1
## [9] pbmcref.SeuratData_1.0.0 digest_0.6.33
## [11] png_0.1-8 ggrepel_0.9.4
## [13] deldir_1.0-9 parallelly_1.36.0
## [15] lungref.SeuratData_2.0.0 MASS_7.3-60.0.1
## [17] Signac_1.12.0 MAST_1.26.0
## [19] reshape2_1.4.4 httpuv_1.6.12
## [21] withr_3.0.0 xfun_0.40
## [23] ellipsis_0.3.2 survival_3.5-8
## [25] EnsDb.Hsapiens.v86_2.99.0 memoise_2.0.1
## [27] zoo_1.8-12 gtools_3.9.4
## [29] pbapply_1.7-2 R.oo_1.25.0
## [31] prettyunits_1.2.0 KEGGREST_1.40.1
## [33] promises_1.2.1 cbmc.SeuratData_3.1.4
## [35] httr_1.4.7 restfulr_0.0.15
## [37] globals_0.16.2 fitdistrplus_1.1-11
## [39] rhdf5filters_1.12.1 rhdf5_2.44.0
## [41] rstudioapi_0.15.0 miniUI_0.1.1.1
## [43] generics_0.1.3 pbmc3k.SeuratData_3.1.4
## [45] curl_5.0.2 zlibbioc_1.46.0
## [47] ScaledMatrix_1.8.1 polyclip_1.10-6
## [49] GenomeInfoDbData_1.2.10 ExperimentHub_2.8.1
## [51] interactiveDisplayBase_1.38.0 xtable_1.8-4
## [53] pracma_2.4.2 evaluate_0.22
## [55] S4Arrays_1.2.0 BiocFileCache_2.8.0
## [57] hms_1.1.3 irlba_2.3.5.1
## [59] colorspace_2.1-0 filelock_1.0.2
## [61] hdf5r_1.3.8 ROCR_1.0-11
## [63] reticulate_1.34.0 spatstat.data_3.0-3
## [65] magrittr_2.0.3 lmtest_0.9-40
## [67] glmGamPoi_1.12.2 later_1.3.1
## [69] viridis_0.6.4 lattice_0.22-5
## [71] spatstat.geom_3.2-7 future.apply_1.11.0
## [73] scattermore_1.2 XML_3.99-0.14
## [75] RcppAnnoy_0.0.21 pillar_1.9.0
## [77] nlme_3.1-163 caTools_1.18.2
## [79] compiler_4.3.3 beachmat_2.16.0
## [81] RSpectra_0.16-1 stringi_1.8.3
## [83] tensor_1.5 GenomicAlignments_1.36.0
## [85] plyr_1.8.9 crayon_1.5.2
## [87] abind_1.4-5 BiocIO_1.10.0
## [89] googledrive_2.1.1 bit_4.0.5
## [91] fastmatch_1.1-4 codetools_0.2-19
## [93] BiocSingular_1.16.0 bslib_0.5.1
## [95] SeuratData_0.2.2.9001 plotly_4.10.3
## [97] mime_0.12 splines_4.3.3
## [99] Rcpp_1.0.11 fastDummies_1.7.3
## [101] dbplyr_2.3.4 sparseMatrixStats_1.12.2
## [103] cellranger_1.1.0 blob_1.2.4
## [105] utf8_1.2.4 here_1.0.1
## [107] BiocVersion_3.17.1 seqLogo_1.68.0
## [109] AnnotationFilter_1.26.0 fs_1.6.3
## [111] listenv_0.9.0 DelayedMatrixStats_1.22.6
## [113] panc8.SeuratData_3.0.2 Matrix_1.6-4
## [115] tzdb_0.4.0 pkgconfig_2.0.3
## [117] pheatmap_1.0.12 tools_4.3.3
## [119] cachem_1.0.8 RSQLite_2.3.1
## [121] viridisLite_0.4.2 DBI_1.2.1
## [123] fastmap_1.1.1 rmarkdown_2.25
## [125] scales_1.3.0 grid_4.3.3
## [127] ica_1.0-3 ifnb.SeuratData_3.0.0
## [129] shinydashboard_0.7.2 Rsamtools_2.16.0
## [131] AnnotationHub_3.8.0 sass_0.4.7
## [133] patchwork_1.1.3 BiocManager_1.30.22
## [135] dotCall64_1.1-0 RANN_2.6.1
## [137] farver_2.1.1 yaml_2.3.8
## [139] rtracklayer_1.60.1 cli_3.6.2
## [141] leiden_0.4.3 lifecycle_1.0.4
## [143] uwot_0.1.16 presto_1.0.0
## [145] BSgenome.Hsapiens.UCSC.hg38_1.4.5 BiocParallel_1.34.2
## [147] annotate_1.78.0 timechange_0.3.0
## [149] gtable_0.3.4 rjson_0.2.21
## [151] ggridges_0.5.4 progressr_0.14.0
## [153] parallel_4.3.3 jsonlite_1.8.7
## [155] RcppHNSW_0.5.0 TFBSTools_1.40.0
## [157] bitops_1.0-7 bit64_4.0.5
## [159] Rtsne_0.16 spatstat.utils_3.0-4
## [161] CNEr_1.38.0 jquerylib_0.1.4
## [163] highr_0.10 shinyjs_2.1.0
## [165] SeuratDisk_0.0.0.9020 R.utils_2.12.2
## [167] lazyeval_0.2.2 shiny_1.7.5.1
## [169] htmltools_0.5.7 GO.db_3.17.0
## [171] sctransform_0.4.1 rappdirs_0.3.3
## [173] ensembldb_2.26.0 glue_1.7.0
## [175] TFMPvalue_0.0.9 spam_2.10-0
## [177] googlesheets4_1.1.1 XVector_0.40.0
## [179] RCurl_1.98-1.12 rprojroot_2.0.3
## [181] BSgenome_1.70.1 gridExtra_2.3
## [183] JASPAR2020_0.99.10 igraph_1.5.1
## [185] R6_2.5.1 SingleCellExperiment_1.22.0
## [187] RcppRoll_0.3.0 labeling_0.4.3
## [189] GenomicFeatures_1.54.1 cluster_2.1.6
## [191] Rhdf5lib_1.22.1 gargle_1.5.2
## [193] DirichletMultinomial_1.44.0 DelayedArray_0.26.7
## [195] tidyselect_1.2.0 ProtGenerics_1.34.0
## [197] xml2_1.3.5 AnnotationDbi_1.62.2
## [199] future_1.33.0 rsvd_1.0.5
## [201] munsell_0.5.0 KernSmooth_2.23-22
## [203] data.table_1.15.0 htmlwidgets_1.6.2
## [205] RColorBrewer_1.1-3 biomaRt_2.58.0
## [207] rlang_1.1.1 spatstat.sparse_3.0-3
## [209] spatstat.explore_3.2-5 fansi_1.0.6